####################################################################
#***2019 analysis
## Packages
# To install and open the R packages that you need for this code. 
need <- c('tidyverse','readstata13','lfe','glue','rdrobust', 'stargazer','arm', 'broom', 'ggplot2', 'dotwhisker', 'gridExtra', 'ggeffects')
have <- need %in% rownames(installed.packages()) 
if(any(!have)) install.packages(need[!have]) 
invisible(lapply(need, library, character.only=T)) 

# Change path to whereever you place the models
# To set up the working directory. 
script_folder = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(glue('{script_folder}'))
rm(list = ls())
setwd("../")


rd.data <- read.dta13("5prepdata/AllCandRDForAnalysis.dta")
rd.data <- dplyr::select(rd.data, -ranprim_h1:-ranprim_s25, -winprim_h1:-winsenate25)
rd.full <- rd.data
rm(rd.data)
# generate interactions:
rd.full <- rd.full %>% 
  ungroup %>% 
  mutate(f_x_win = won_election * victory_marg) %>% 
  mutate(rep = ifelse(str_detect(cand_party, "Republican"), 1, 0)) %>%  
  mutate(spc_elec = ifelse(election_type!="G", 1, 0))



## load in RD data:
load("5prepdata/final_rd_data.RData")
# 
table(rd.data$rep)

#here we calculate the running tallies
cand.lost <- rd.full %>% 
  ungroup %>% 
  group_by(cand_id) %>% 
  arrange(year) %>% 
  mutate(times_run = row_number()) %>% 
  mutate(times_won = cumsum(won_election)) %>%
  mutate(lost = times_run - times_won) 
 


rd.full <- left_join(rd.full, cand.lost)

rd.data.losses<-rd.full[rd.full$times_run>1,]
table(rd.data.losses$lost)
rd.data.losses$prev.losses <- rd.data.losses$lost-(abs(rd.data.losses$won_election-1))
table(rd.data.losses$prev.losses)
rd.data.losses<-rd.data.losses[rd.data.losses$prev.losses>0,]
rd.data.losses$ever_winprim_h[1] <- NA
table(rd.data.losses$won_election)

### by any previous losses
main.specification.addl <- function(x) {
  rd.data.losses$cutoff<-NULL
  rd.data.losses$bandwidth<-NULL
  rd.data.losses$kw<-NULL
  
  y<-rdbwselect(x, rd.data.losses$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.data.losses$running_terms_served + rd.data.losses$dem + rd.data.losses$rep + rd.data.losses$spc_elec +  rd.data.losses$term_length + rd.data.losses$number_candidates + rd.data.losses$year)
  rd.data.losses$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.data.losses$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.data.losses$kw <- 1-(abs(rd.data.losses$cutoff-rd.data.losses$victory_marg))/rd.data.losses$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.data.losses$kw[rd.data.losses$victory_marg>(rd.data.losses$cutoff+rd.data.losses$bandwidth) | rd.data.losses$victory_marg<(rd.data.losses$cutoff-rd.data.losses$bandwidth)] <- 0
  out <- felm(x[rd.data.losses$kw>0] ~ won_election  + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.data.losses[rd.data.losses$kw>0,], weights = rd.data.losses$kw[rd.data.losses$kw>0])
  rd.data.losses$mainefx<-NULL
  rd.data.losses$interaction<-NULL
  return(out)
}


m1<-main.specification.addl(rd.data.losses$ever_ranprim_h)

m3<-main.specification.addl(rd.data.losses$ever_winprim_h)

m5<-main.specification.addl(rd.data.losses$ever_runhouse)

m7<-main.specification.addl(rd.data.losses$ever_winhouse)


stargazer(list(m1, m3, m5,m7),
          title = "Effect of state legislative service on career progression to Congressional candidacy and representation: Sample of candidates with a previous loss",
          style="apsr",
          font.size =  "scriptsize",
          model.numbers=T,
          out="7tex/manuscript/tables/sourcefiles/Appendix Table 7.tex",
          label = "tab:main_results_prevlosses",
          keep.stat=c("n","rsq"),
          dep.var.labels  = "Ever:",
          column.labels=c("Ran for House primary", "Won House primary", "Ran for House general", "Won House general"),
          keep = c("won_election"),
          covariate.labels = c("Won state legis. seat"),
          notes = "\\parbox[t]{\\linewidth}{This table reports estimates of the effect of an additional state legislative term on individuals' career progression to Congress among a sample of candidates who previously lost a state legislature election. The dependent variable is equal to one if the candidate ever runs in the election listed in the column header and is zero otherwise. The sample contains all first-time state legislative elections within the optimal bandwidth based on the \\cite{CCT} algorithm. All regressions include state and election year fixed effects, a linear term in the election margin as well as its interaction with the indicator for having won, and the full set of candidate and election controls. Estimations are triangular kernel-weighted.  Standard errors clustered by state are reported in parentheses.}" # *** = significant at 1 percent level, ** = significant at 5 percent level, * = significant at 10 percent level.
)


### by number of previous terms served
table(rd.full$times_run)
table(rd.full$times_won)

rd.full<-rd.full[rd.full$times_run>=1,]
rd.full$times_won_prev<-rd.full$times_won-rd.full$won_election
table(rd.full$times_won_prev)
rd.full<-rd.full[rd.full$times_won_prev>=0,]
table(rd.full$times_won_prev)
rd.full$times_won_prev[rd.full$times_won_prev>4]<-4
table(rd.full$times_won_prev)

main.specification.addl <- function(x, times) {
  rd.full$cutoff<-NULL
  rd.full$bandwidth<-NULL
  rd.full$kw<-NULL
  
  y<-rdbwselect(x, rd.full$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd", covs = rd.full$running_terms_served + rd.full$dem + rd.full$rep + rd.full$spc_elec +  rd.full$term_length + rd.full$number_candidates + rd.full$year, subset = rd.full$times_won_prev == times)
  rd.full$bandwidth<-y$bws[1]
  rm(y)
  
  # gen cutoff = 0
  rd.full$cutoff <- 0
  # gen kw = 1-(abs(cutoff-victory_marg))/bandwidth
  rd.full$kw <- 1-(abs(rd.full$cutoff-rd.full$victory_marg))/rd.full$bandwidth
  # replace kw = 0 if (victory_marg>cutoff+bandwidth | victory_marg<cutoff-bandwidth) 
  rd.full$kw[rd.full$victory_marg>(rd.full$cutoff+rd.full$bandwidth) | rd.full$victory_marg<(rd.full$cutoff-rd.full$bandwidth)] <- 0
  out <- felm(x[rd.full$kw>0 & rd.full$times_won_prev == times] ~ won_election  + victory_marg + f_x_win + running_terms_served + dem + rep + spc_elec + term_length + number_candidates | year + state | 0 | state, data = rd.full[rd.full$kw>0 & rd.full$times_won_prev == times,], weights = rd.full$kw[rd.full$kw>0 & rd.full$times_won_prev == times])
  rd.full$mainefx<-NULL
  rd.full$interaction<-NULL
  return(out)
}


list.models<-lapply(0:4, function(x)main.specification.addl(rd.full$ever_ranprim_h,x) %>% 
                      tidy %>% 
                      mutate(sample = x)) %>% bind_rows %>%  mutate(model = "ran in House primary") %>% filter(term=="won_election")
#betas<-as.data.frame(lapply(list.models, tidy))[1,c(1:3, 7:8, 12:13, 17:18, 22:23)]
#betas$outcome<-'ran in House primary'

list.models2<-lapply(0:4, function(x)main.specification.addl(rd.full$ever_winprim_h,x) %>% 
                      tidy %>% 
                      mutate(sample = x)) %>% bind_rows %>%  mutate(model = "won House primary") %>% filter(term=="won_election")
#temp<-as.data.frame(lapply(list.models, tidy))[1,c(1:3, 7:8, 12:13, 17:18, 22:23)]
#temp$outcome<-'won House primary'
#betas<-rbind(betas, temp)

list.models3<-lapply(0:4, function(x)main.specification.addl(rd.full$ever_runhouse,x) %>% 
                      tidy %>% 
                      mutate(sample = x)) %>% bind_rows %>%  mutate(model = "ran for House") %>% filter(term=="won_election")
#temp<-as.data.frame(lapply(list.models, tidy))[1,c(1:3, 7:8, 12:13, 17:18, 22:23)]
#temp$outcome<-'ran for House'
#betas<-rbind(betas, temp)

list.models4<-lapply(0:4, function(x)main.specification.addl(rd.full$ever_winhouse,x) %>% 
                      tidy %>% 
                      mutate(sample = x)) %>% bind_rows %>%  mutate(model = "won House election") %>% filter(term=="won_election")
#temp<-as.data.frame(lapply(list.models, tidy))[1,c(1:3, 7:8, 12:13, 17:18, 22:23)]
#temp$outcome<-'won House election'
#betas<-rbind(betas, temp)

models <- bind_rows(list.models, list.models2, list.models3, list.models4)


# # reshape it all here
# library(reshape)
# longbetas <- melt(betas, id=c("term","outcome"))
# longbetas$variable<-gsub("std.error","se",longbetas$variable)
# longbetas<-longbetas %>% separate(variable, c("measure", "sample"))
# longbetas$sample[is.na(longbetas$sample)]<-0
# 
# longbetas <- cast(longbetas, term+outcome+sample~measure, mean)
# # longbetas$sample<-as.numeric(longbetas$sample)+1
# longbetas$sample<-as.character(longbetas$sample)
# longbetas$sample[longbetas$sample=="4"]<-"4+"
# longbetas$lb<-longbetas$estimate - 1.96*longbetas$se
# longbetas$ub<-longbetas$estimate + 1.96*longbetas$se
# 
# longbetas <- longbetas %>% dplyr::rename(submodel = sample, model = outcome)



#then plug into ggplot

models$model<- factor(models$model, levels = c('ran in House primary','won House primary','ran for House','won House election'))


#longbetas$lab1<-longbetas$outcome
models$term <- NULL
models <- dplyr::rename(models, submodel = model)
models <- dplyr::rename(models, model = sample)
models$term <- "Won Election"

small_multiple(models, dot_args = list(aes(shape = submodel))) +
  #coord_flip() +
theme_bw() + ylab("Coefficient Estimate") + 
  geom_hline(yintercept = 0, colour = "grey60", linetype = 2) +
  xlab("Prev. Terms Served") +
  # scale_color_discrete(name="Previous\nTerms\nServed",
  #                     breaks=c(0:4),
  #                     labels=c(0:4)) +
  # scale_shape_discrete(name="Previous\nTerms\nServed",
  #                      breaks=c(0:4),
  #                      labels=c(0:4)) +
  theme(panel.grid.minor.y=element_blank(),
        panel.grid.major.y=element_blank(),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank(),
        legend.title = element_blank()) 
  ggsave('7tex/manuscript/tables/sourcefiles/Figure 6.pdf', units="in", width=8, height=5)

  


